***
*** This do file requires LEHD files from https://lehd.ces.census.gov/data/
***


*****Number of primary jobs for people making less than $1250 per month, by job location
clear
set obs 1
gen blah = 1
save temp, replace

foreach state in dc md va {
	*LEHD job counts
	*JT01 is primary job; SE01 is earnings less than $1250 per month
	clear
	insheet using "LEHD Data/`state'_wac_S000_JT01_2014.csv", nonames
	rename v1 w_geocode
	rename v6 jobs_lt1250
	rename v40 jobs_bacplus
	drop in 1
	destring jobs_* , replace
	keep w_geocode jobs_lt1250 jobs_bacplus

	append using temp
	save temp, replace
}

drop if blah == 1
drop blah

save temp, replace


*****Number of black/white people who make less than $1250 per month living in each census block
clear
set obs 1
gen blah = 1
save temp1, replace

foreach state in dc md va {
	clear
	insheet using "LEHD Data/`state'_rac_S000_JT01_2014.csv", nonames
	rename v1 w_geocode
	rename v2 res_tot
	rename v4 res_age_30_54
	rename v5 res_age_gt55
	rename v6 res_lt1250
	rename v29 res_white
	rename v30 res_black
	rename v39 res_somecoll
	rename v40 res_bacplus
	drop in 1
	destring res_*, replace
	keep w_geocode res_*
	
	append using temp1
	save temp1, replace
}
drop if blah == 1
drop blah


*****Merging job locations and residential locations
merge 1:1 w_geocode using temp
foreach var of varlist jobs_lt1250 jobs_bacplus res_lt1250 res_white res_black res_bacplus res_tot res_age_30_54 res_age_gt55 res_somecoll {
	replace `var' = 0 if _merge ~= 3
}
drop _merge

save temp, replace


*****Merging on geography info
clear
set obs 1
gen blah = 1
save temp1, replace
foreach state in dc md va {
	clear
	insheet using "LEHD Data/`state'_xwalk.csv", nonames
	rename v1 w_geocode
	rename v4 state
	rename v6 county
	rename v7 tract
	rename v11 cbsa
	drop v*
	drop in 1
	
	append using temp1
	save temp1, replace
}
drop if blah == 1

merge 1:1 w_geocode using temp.dta
foreach var of varlist jobs_lt1250 jobs_bacplus res_lt1250 res_white res_black res_bacplus res_tot res_age_30_54 res_age_gt55 res_somecoll {
	replace `var' = 0 if _merge ~= 3
}
drop _merge


*****sample selection: DC area counties, collapse to tracts
keep if county == "District of Columbia, DC" | county == "Montgomery County, MD" ///
	| county == "Prince George's County, MD" | county == "Alexandria city, VA" ///
	| county == "Arlington County, VA" | county == "Falls Church city, VA" ///
	| county == "Fairfax County, VA" | county == "Fairfax city, VA"

collapse (sum) jobs_lt1250 jobs_bacplus res_lt1250 res_white res_black res_bacplus res_tot res_age_30_54 res_age_gt55 res_somecoll, by(state county tract)
save temp, replace


*****adding centroid coordinates
clear
insheet using "LEHD Data/gazetteer.txt", nonames tab
rename v2 tract
rename v3 pop10
rename v9 latitude
rename v10 longitude
drop in 1
drop v*
destring latitude longitude, replace

merge 1:1 tract using temp
drop if _merge == 1
drop _merge


*****calculating distances between all tracts
gen lattemp = .
gen longtemp = .
forvalues x = 1(1)`=_N' {
	qui replace lattemp = latitude[`x']
	qui replace longtemp = longitude[`x']
	geodist latitude longitude lattemp longtemp, gen(distance_`x') mi
}
drop lattemp longtemp


*****avg distance to jobs for each tract
egen tot_jobs = total(jobs_lt1250)
gen avg_distance = 0
forvalues x = 1(1)`=_N' {
	qui replace avg_distance = avg_distance + (jobs_lt1250[`x'] / tot_jobs) * distance_`x'
}


*****average distance for all, white, black
su avg_distance [aweight = res_lt1250] if state == "District of Columbia"
su avg_distance [aweight = res_white] if state == "District of Columbia"
su avg_distance [aweight = res_black] if state == "District of Columbia"

*****graph
*gen fracbacplus = (res_bacplus + res_somecoll) / (res_age_30_54 + res_age_gt55)
gen fracbacplus = (res_bacplus) / (res_age_30_54 + res_age_gt55) * 100

twoway (scatter fracbacplus avg_dist) (lfit fracbacplus avg_dist) ///
if state == "District of Columbia" ///
, legend(off) xtitle(Average Distance to Job Location in Miles) ///
ytitle(Percent College Educated) scheme(s2color) graphr(color(white))


gen fracwhite = res_white / res_tot * 100

twoway (scatter fracwhite avg_dist) (lfit fracwhite avg_dist) ///
if state == "District of Columbia" ///
, legend(off) xtitle(Average Distance to Job Location in Miles) ///
ytitle(Percent White) scheme(s2color) graphr(color(white))



gen frachighwage = 100 - res_lt1250 / res_tot * 100
gen fraclowwage = res_lt1250 / res_tot * 100

twoway (scatter fraclowwage avg_dist) (lfit fraclowwage avg_dist) ///
if state == "District of Columbia" ///
, legend(off) xtitle(Average Distance to Job Location in Miles) ///
ytitle(Percent with Earnings < $1250/month) scheme(s2color) graphr(color(white)) ///
ylabel(0(5)30) xlabel(9(1)13)


reg fraclowwage avg_dist  if state == "District of Columbia"
correl fraclowwage avg_dist if state == "District of Columbia"




